acc <- read.csv('./train/accident_train.csv', header = TRUE)

library(dplyr)
## 
## Attaching package: 'dplyr'
## 
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## 
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
# build up a FIPS code for chorplethr map
# first check the number of 'blank' counties first: 32
count(acc[acc$COUNTY == '0',])
## Source: local data frame [1 x 1]
## 
##       n
##   (int)
## 1    32
# test with small subset
h <- head(acc)
newh <- h %>%
  mutate(newCounty = sprintf("%03d", COUNTY)) %>%
  mutate(FIPS = interaction(STATE, newCounty, sep=''))

# now back with the full data for FIPS also filtering out DRUNK or not
acc.FIPS <- acc %>%
  mutate(newCounty = sprintf("%03d", COUNTY)) %>%
  mutate(FIPS = interaction(STATE, newCounty, sep='')) 

# separate Drunk and non-drunk fatalities into 2 dfs
acc.F.drunk <- filter(acc.FIPS, DRUNK_DR == 'True')
acc.F.notdrunk <- filter(acc.FIPS, DRUNK_DR == 'False')       

# now groupby FIPS and summarise the sum() of # of fatals
visDrunk <- acc.F.drunk %>% 
  group_by(FIPS) %>%
  summarise(total = n())

visNotDrunk <- acc.F.notdrunk %>%
  group_by(FIPS) %>%
  summarise(total = n())

# some detailing and note that process of changing "region" factors to numeric
colnames(visDrunk) <- c("region", "value")
colnames(visNotDrunk) <- c("region", "value")

visDrunk$region <- as.numeric(levels(visDrunk$region))[visDrunk$region]
visNotDrunk$region <- as.numeric(levels(visNotDrunk$region))[visNotDrunk$region]

# Now some plotting
library(choroplethr)
library(ggplot2)
library(viridis)

# One way of doing a plot
choroD = CountyChoropleth$new(visDrunk)
## Warning in super$initialize(map.df, user.df): Your data.frame contains the
## following regions which are not mappable: 2000, 12000, 19000, 26000, 35000,
## 51000, 2201, 2280, 25997, 12999
choroD$title = "Drunk driver - Total Fatalities"
choroD$set_num_colors(1)
choroD$ggplot_polygon = geom_polygon(aes(fill = value), color = NA)
choroD$ggplot_scale = scale_fill_gradientn(name = "Total number", colours = viridis(32), limits = c(0, 1500))
choroD$render()
## Warning in self$bind(): The following regions were missing and are being
## set to NA: 16081, 48045, 48431, 31011, 35021, 2050, 2105, 49009, 30019,
## 20187, 48301, 38041, 49055, 2188, 2198, 46057, 12067, 46129, 15005, 35011,
## 2100, 48205, 19063, 51830, 20101, 48101, 48345, 51580, 51610, 51640, 51685,
## 51750, 38023, 46089, 31091, 26083, 48129, 51735, 31113, 20199, 20071,
## 31171, 31183, 30051, 31073, 31085, 31117, 46043, 48191, 48263, 48275,
## 51530, 51595, 8095, 2195, 2230, 2013, 8061, 2275, 8091, 5013, 19053

# Another way of plotting. This seems more clearer. 
county_choropleth(visDrunk, 
                  title  = "Drunk driver - Total Fatalities", 
                  legend = "Number")
## Warning in super$initialize(map.df, user.df): Your data.frame contains the
## following regions which are not mappable: 2000, 12000, 19000, 26000, 35000,
## 51000, 2201, 2280, 25997, 12999
## Warning in self$bind(): The following regions were missing and are being
## set to NA: 16081, 48045, 48431, 31011, 35021, 2050, 2105, 49009, 30019,
## 20187, 48301, 38041, 49055, 2188, 2198, 46057, 12067, 46129, 15005, 35011,
## 2100, 48205, 19063, 51830, 20101, 48101, 48345, 51580, 51610, 51640, 51685,
## 51750, 38023, 46089, 31091, 26083, 48129, 51735, 31113, 20199, 20071,
## 31171, 31183, 30051, 31073, 31085, 31117, 46043, 48191, 48263, 48275,
## 51530, 51595, 8095, 2195, 2230, 2013, 8061, 2275, 8091, 5013, 19053

county_choropleth(visNotDrunk, 
                  title  = "Non-Drunk driver - Total Fatalities", 
                  legend = "Number")
## Warning in super$initialize(map.df, user.df): Your data.frame contains the
## following regions which are not mappable: 1000, 2000, 4000, 10000, 13000,
## 19000, 28000, 30000, 35000, 42000, 48000, 2201, 2280, 4999, 30999
## Warning in self$bind(): The following regions were missing and are being
## set to NA: 38065, 31071, 31075, 2105, 30019, 21201, 2198, 46107, 15005,
## 51620, 2016, 2060, 51580, 51678, 51685, 27155, 25019, 2282, 48155, 51735,
## 31009, 31117, 51530, 2195, 2230, 2013, 2275

county_choropleth(visDrunk, 
                  title  = "Michigan Drunk driver - Total Fatalities", 
                  legend = "Number",
                  state_zoom = "michigan",
                  reference_map = TRUE)
## Warning in super$initialize(map.df, user.df): Your data.frame contains the
## following regions which are not mappable: 2000, 12000, 19000, 26000, 35000,
## 51000, 2201, 2280, 25997, 12999
## Warning in self$bind(): The following regions were missing and are being
## set to NA: 26083
## Map from URL : http://maps.googleapis.com/maps/api/staticmap?center=45.206845,-85.913497&zoom=6&size=640x640&scale=2&maptype=terrain&language=en-EN&sensor=false
## Scale for 'x' is already present. Adding another scale for 'x', which
## will replace the existing scale.
## Scale for 'y' is already present. Adding another scale for 'y', which
## will replace the existing scale.
## Warning: Removed 1 rows containing missing values (geom_rect).

county_choropleth(visNotDrunk, 
                  title  = "Michigan Non-Drunk driver - Total Fatalities", 
                  legend = "Number",
                  state_zoom = "michigan",
                  reference_map = TRUE)
## Warning in super$initialize(map.df, user.df): Your data.frame contains the
## following regions which are not mappable: 1000, 2000, 4000, 10000, 13000,
## 19000, 28000, 30000, 35000, 42000, 48000, 2201, 2280, 4999, 30999
## Map from URL : http://maps.googleapis.com/maps/api/staticmap?center=45.206845,-85.913497&zoom=6&size=640x640&scale=2&maptype=terrain&language=en-EN&sensor=false
## Scale for 'x' is already present. Adding another scale for 'x', which
## will replace the existing scale.
## Scale for 'y' is already present. Adding another scale for 'y', which
## will replace the existing scale.
## Warning: Removed 1 rows containing missing values (geom_rect).

county_choropleth(visDrunk, 
                  title  = "California Drunk driver - Total Fatalities", 
                  legend = "Number",
                  state_zoom = "california",
                  reference_map = TRUE)
## Warning in super$initialize(map.df, user.df): Your data.frame contains the
## following regions which are not mappable: 2000, 12000, 19000, 26000, 35000,
## 51000, 2201, 2280, 25997, 12999
## Map from URL : http://maps.googleapis.com/maps/api/staticmap?center=37.010625,-120.716466&zoom=6&size=640x640&scale=2&maptype=terrain&language=en-EN&sensor=false
## Scale for 'x' is already present. Adding another scale for 'x', which
## will replace the existing scale.
## Scale for 'y' is already present. Adding another scale for 'y', which
## will replace the existing scale.
## Warning: Removed 1 rows containing missing values (geom_rect).

county_choropleth(visNotDrunk, 
                  title  = "California Non-Drunk driver - Total Fatalities", 
                  legend = "Number",
                  state_zoom = "california",
                  reference_map = TRUE)
## Warning in super$initialize(map.df, user.df): Your data.frame contains the
## following regions which are not mappable: 1000, 2000, 4000, 10000, 13000,
## 19000, 28000, 30000, 35000, 42000, 48000, 2201, 2280, 4999, 30999
## Map from URL : http://maps.googleapis.com/maps/api/staticmap?center=37.010625,-120.716466&zoom=6&size=640x640&scale=2&maptype=terrain&language=en-EN&sensor=false
## Scale for 'x' is already present. Adding another scale for 'x', which
## will replace the existing scale.
## Scale for 'y' is already present. Adding another scale for 'y', which
## will replace the existing scale.
## Warning: Removed 1 rows containing missing values (geom_rect).